{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "41762069",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "157dd838",
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow.compat.v1 as tf\n",
    "import tensorflow_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c5cf4c14",
   "metadata": {},
   "outputs": [],
   "source": [
    "import struct\n",
    "\n",
    "unknown = b'\\xff\\xff\\xff\\xff'\n",
    "\n",
    "def load_graph(frozen_graph_filename):\n",
    "    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n",
    "        graph_def = tf.GraphDef()\n",
    "        graph_def.ParseFromString(f.read())\n",
    "        \n",
    "    for node in graph_def.node:\n",
    "        \n",
    "        if node.op == 'RefSwitch':\n",
    "          node.op = 'Switch'\n",
    "          for index in xrange(len(node.input)):\n",
    "            if 'moving_' in node.input[index]:\n",
    "              node.input[index] = node.input[index] + '/read'\n",
    "        elif node.op == 'AssignSub':\n",
    "          node.op = 'Sub'\n",
    "          if 'use_locking' in node.attr: del node.attr['use_locking']\n",
    "        elif node.op == 'AssignAdd':\n",
    "          node.op = 'Add'\n",
    "          if 'use_locking' in node.attr: del node.attr['use_locking']\n",
    "        elif node.op == 'Assign':\n",
    "          node.op = 'Identity'\n",
    "          if 'use_locking' in node.attr: del node.attr['use_locking']\n",
    "          if 'validate_shape' in node.attr: del node.attr['validate_shape']\n",
    "          if len(node.input) == 2:\n",
    "            node.input[0] = node.input[1]\n",
    "            del node.input[1]\n",
    "            \n",
    "        if 'Reshape/shape' in node.name or 'Reshape_1/shape' in node.name:\n",
    "            b = node.attr['value'].tensor.tensor_content\n",
    "            arr_int = [int.from_bytes(b[i:i + 4], 'little') for i in range(0, len(b), 4)]\n",
    "            if len(arr_int):\n",
    "                arr_byte = [unknown] + [struct.pack('<i', i) for i in arr_int[1:]]\n",
    "                arr_byte = b''.join(arr_byte)\n",
    "                node.attr['value'].tensor.tensor_content = arr_byte\n",
    "            \n",
    "            if len(node.attr['value'].tensor.int_val):\n",
    "                node.attr['value'].tensor.int_val[0] = -1\n",
    "    \n",
    "    with tf.Graph().as_default() as graph:\n",
    "        tf.import_graph_def(graph_def)\n",
    "    return graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b3b9b2fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "g = load_graph('out/frozen_model.pb')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "49b7c54c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(<tf.Tensor 'import/inputs:0' shape=(None,) dtype=string>,\n",
       " <tf.Tensor 'import/SelectV2_3:0' shape=(None, 256) dtype=int32>)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "i = g.get_tensor_by_name('import/inputs:0')\n",
    "o = g.get_tensor_by_name('import/SelectV2_3:0')\n",
    "i, o"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "78f9f46d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-07-07 21:46:55.075064: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2022-07-07 21:46:55.081795: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2022-07-07 21:46:55.082583: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2022-07-07 21:46:55.084030: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2022-07-07 21:46:55.084706: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2022-07-07 21:46:55.085543: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2022-07-07 21:46:55.086312: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2022-07-07 21:46:55.329468: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2022-07-07 21:46:55.330244: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2022-07-07 21:46:55.330969: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2022-07-07 21:46:55.331654: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
      "2022-07-07 21:46:55.331674: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20404 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090 Ti, pci bus id: 0000:01:00.0, compute capability: 8.6\n"
     ]
    }
   ],
   "source": [
    "test_sess = tf.Session(graph = g)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e9801cc2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-07-07 21:46:57.278733: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.51 s, sys: 959 ms, total: 2.47 s\n",
      "Wall time: 2.35 s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-07-07 21:46:57.656368: I tensorflow/stream_executor/cuda/cuda_blas.cc:1760] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(3, 256)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "o_ = test_sess.run(o, feed_dict = {i: [\n",
    "    'terjemah Melayu ke Inggeris: i like u',\n",
    "    'terjemah Melayu ke Inggeris: hidup ini',\n",
    "    'terjemah Melayu ke Inggeris: ak tak paham la.',\n",
    "]})\n",
    "o_.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "2c705d4d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sentencepiece as spm\n",
    "\n",
    "DEFAULT_SPM_PATH = 'sp10m.cased.ms-en.model'\n",
    "sp_model = spm.SentencePieceProcessor()\n",
    "sp_model.Load(DEFAULT_SPM_PATH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9be24ba3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 I'm like u\n",
      "1 life\n",
      "2 I don't understand it.\n"
     ]
    }
   ],
   "source": [
    "for k in range(len(o_)):\n",
    "    print(k, sp_model.DecodeIds(o_[k].tolist()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "69659b1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('ms-en-right.test') as fopen:\n",
    "    right = fopen.read().split('\\n')\n",
    "    \n",
    "with open('ms-en-left.test') as fopen:\n",
    "    left = fopen.read().split('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "7edad91c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5475/5475 [30:40<00:00,  2.98it/s]\n"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "batch_size = 16\n",
    "\n",
    "results = []\n",
    "for k in tqdm(range(0, len(left), batch_size)):\n",
    "    inputs = [f'terjemah Melayu ke Inggeris: {s}' for s in left[k:k + batch_size]]\n",
    "    o_ = test_sess.run(o, feed_dict = {i: inputs})\n",
    "    results.extend(o_.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "c8b38606",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import json\n",
    "\n",
    "# with open('temp.json', 'w') as fopen:\n",
    "#     json.dump(results, fopen)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b9ce8f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "with open('temp.json') as fopen:\n",
    "    results = json.load(fopen)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "6e5082e7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "87596"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "78a99e62",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sacrebleu.metrics import BLEU, CHRF, TER\n",
    "\n",
    "bleu = BLEU()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "9bc0cabd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"The Penal Code (Malaysia) makes any sexual connection between men and women under the age of 16 as rape either committed by the woman's loyalty or not.\""
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r = results[0]\n",
    "sp_model.DecodeIds(r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "70b8e32f",
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_left, filtered_right = [], []\n",
    "for no, r in enumerate(results):\n",
    "    r = sp_model.DecodeIds(r)\n",
    "    if len(r):\n",
    "        filtered_left.append(r)\n",
    "        filtered_right.append(right[no])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "4136653c",
   "metadata": {},
   "outputs": [],
   "source": [
    "refs = [filtered_right]\n",
    "sys = filtered_left"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "49f7dff3",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "That's 100 lines that end in a tokenized period ('.')\n",
      "It looks like you forgot to detokenize your test data, which may hurt your score.\n",
      "If you insist your data is detokenized, or don't care, you can suppress this message with the `force` parameter.\n"
     ]
    }
   ],
   "source": [
    "r = bleu.corpus_score(sys, refs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "7c85ed40",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'name': 'BLEU',\n",
       " 'score': 64.1407496264804,\n",
       " '_mean': -1.0,\n",
       " '_ci': -1.0,\n",
       " '_verbose': '82.3/67.8/58.7/51.8 (BP = 0.999 ratio = 0.999 hyp_len = 1999775 ref_len = 2001100)',\n",
       " 'bp': 0.9993376449144036,\n",
       " 'counts': [1645886, 1297031, 1071421, 899662],\n",
       " 'totals': [1999775, 1912180, 1824756, 1737752],\n",
       " 'sys_len': 1999775,\n",
       " 'ref_len': 2001100,\n",
       " 'precisions': [82.30355915040442,\n",
       "  67.82996370634564,\n",
       "  58.71585022874291,\n",
       "  51.77159916950175],\n",
       " 'prec_str': '82.3/67.8/58.7/51.8',\n",
       " 'ratio': 0.9993378641747039}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r.__dict__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15c6d138",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
